Data Preparation and
Setup
data_2020 <- read.csv("2020.csv")
data_2025 <- readRDS("crew_dataset_keys_analysis_ready.rds")
age_order <- c("Under 25", "25-29", "30-34", "35-39", "40-44", "45-49",
"50-54", "55-59", "60-64", "65 or older")
standardize_gender <- function(gender_col) {
gender_col <- tolower(trimws(gender_col))
case_when(
gender_col %in% c("female", "woman") ~ "Women",
gender_col %in% c("male", "man") ~ "Men",
grepl("nonbinary|non-binary|gender variant", gender_col) ~ "Nonbinary",
TRUE ~ as.character(gender_col)
)
}
df_2020 <- data_2020 %>%
mutate(
gender = standardize_gender(m27),
age = factor(m25, levels = age_order)
) %>%
filter(!is.na(gender) & !is.na(age) & gender != "" & age != "Decline to Answer")
clean_numeric_column <- function(col) {
col_char <- as.character(col)
col_clean <- gsub(",", "", col_char)
col_clean <- gsub("\\s+", "", col_clean)
col_numeric <- suppressWarnings(as.numeric(col_clean))
col_numeric[col_numeric > 10000000] <- NA
col_numeric[col_numeric < 0] <- 0
return(col_numeric)
}
df_2025 <- data_2025 %>%
mutate(
gender = standardize_gender(m27),
age = factor(m25, levels = age_order),
base_salary = clean_numeric_column(`m21A[1_SQ001]`),
bonus = clean_numeric_column(`m21A[2_SQ001]`),
commission = clean_numeric_column(`m21A[3_SQ001]`),
profit_sharing = clean_numeric_column(`m21A[4_SQ001]`),
long_term_incentive = clean_numeric_column(`m21A[5_SQ001]`),
total_comp_reported = clean_numeric_column(`m28B`),
specialization_group = case_when(
m1B %in% c("Asset/Property Management", "Corporate Real Estate", "Portfolio Management") ~ "Asset Management",
m1B %in% c("Brokerage/Sales/Leasing") ~ "Brokerage",
m1B %in% c("Acquisitions/ Dispositions", "Architecture and Design", "Construction",
"Development", "Economic Development", "Engineering", "Environmental",
"Interior Design", "Investments") ~ "Development",
m1B %in% c("Accounting", "Appraisal/Valuation", "Consulting", "Executive Management",
"Finance/Lending/Mortgage", "Human Resources", "Law",
"Marketing/Business Development", "Sustainability", "Title/Escrow") ~ "Finance",
TRUE ~ "Other"
),
ethnicity = case_when(
`G20Q70` == "White (Non-Hispanic)" ~ "White",
`G20Q70` == "African-American/Black" ~ "Black",
`G20Q70` == "Asian" ~ "Asian",
`G20Q70` == "Hispanic/Latinx (Any Race)" ~ "Hispanic/Latinx",
`G20Q70` == "Other/mixed" ~ "Other/Mixed",
`G20Q70` %in% c("Native American or Alaskan", "Middle Eastern or North African") ~ "Other/Mixed",
`G20Q70` == "Prefer not to answer" ~ NA_character_,
TRUE ~ NA_character_
),
total_compensation = case_when(
!is.na(total_comp_reported) & total_comp_reported > 0 ~ total_comp_reported,
TRUE ~ (replace_na(base_salary, 0) + replace_na(bonus, 0) +
replace_na(commission, 0) + replace_na(profit_sharing, 0) +
replace_na(long_term_incentive, 0))
)
) %>%
filter(!is.na(gender) & !is.na(age) & gender != "" & age != "Decline to Answer")
gender_colors <- c("Women" = "#e74c3c", "Men" = "#3498db", "Nonbinary" = "#9b59b6")
ethnicity_colors <- c("White" = "#3498db", "Black" = "#e74c3c", "Asian" = "#f39c12",
"Hispanic/Latinx" = "#27ae60", "Other/Mixed" = "#9b59b6")
Age Distribution
Weighting Framework
women_2020_age_counts <- df_2020 %>%
filter(gender == "Women") %>%
count(age, name = "count_2020") %>%
mutate(prop_2020 = count_2020 / sum(count_2020))
kable(women_2020_age_counts,
caption = "2020 Women Age Distribution (Reference for Weighting)",
col.names = c("Age Group", "Count 2020", "Proportion 2020"),
digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE, color = "white", background = "#3498db")
2020 Women Age Distribution (Reference for Weighting)
|
Age Group
|
Count 2020
|
Proportion 2020
|
|
Under 25
|
38
|
0.018
|
|
25-29
|
164
|
0.077
|
|
30-34
|
271
|
0.127
|
|
35-39
|
325
|
0.153
|
|
40-44
|
292
|
0.137
|
|
45-49
|
263
|
0.123
|
|
50-54
|
266
|
0.125
|
|
55-59
|
268
|
0.126
|
|
60-64
|
172
|
0.081
|
|
65 or older
|
72
|
0.034
|
women_age_weights <- women_2020_age_counts %>%
select(age, weight = prop_2020)
theme_compensation <- theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5, margin = margin(b = 20)),
plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40", margin = margin(b = 15)),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
legend.position = "bottom",
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "gray90", size = 0.3),
panel.grid.major.y = element_line(color = "gray90", size = 0.3),
strip.text = element_text(size = 11, face = "bold"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
)
format_currency <- function(x) {
scales::dollar_format(accuracy = 1)(x)
}
apply_women_weighting <- function(data, question_col, show_breakdown = FALSE) {
plot_data <- data %>%
select(gender, age, response = all_of(question_col)) %>%
filter(!is.na(response) & response != "")
total_responses <- nrow(plot_data)
men_data <- plot_data %>% filter(gender == "Men")
women_data <- plot_data %>% filter(gender == "Women")
men_summary <- men_data %>%
count(response) %>%
mutate(
percent = n / sum(n) * 100,
global_percent = n / total_responses * 100,
gender = "Men",
weighted_percent = percent
)
women_by_age <- women_data %>%
count(age, response) %>%
group_by(age) %>%
mutate(percent_in_age = n / sum(n) * 100) %>%
ungroup()
women_weighted <- women_by_age %>%
left_join(women_age_weights, by = "age") %>%
mutate(weighted_contribution = percent_in_age * weight) %>%
group_by(response) %>%
summarise(
weighted_percent = sum(weighted_contribution, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(gender = "Women")
final_results <- bind_rows(
men_summary %>% select(response, gender, weighted_percent),
women_weighted %>% select(response, gender, weighted_percent)
)
return(final_results)
}
apply_compensation_weighting <- function(data, salary_col, group_col = NULL) {
if (is.character(data[[salary_col]])) {
data[[salary_col]] <- as.numeric(data[[salary_col]])
}
if (is.null(group_col)) {
plot_data <- data %>%
select(gender, age, salary = all_of(salary_col)) %>%
filter(!is.na(salary) & salary > 0 & !is.infinite(salary))
} else {
plot_data <- data %>%
select(gender, age, salary = all_of(salary_col), group = all_of(group_col)) %>%
filter(!is.na(salary) & salary > 0 & !is.infinite(salary) & !is.na(group) & group != "")
}
men_data <- plot_data %>% filter(gender == "Men")
women_data <- plot_data %>% filter(gender == "Women")
if (is.null(group_col)) {
men_stats <- men_data %>%
summarise(
mean_salary = mean(salary, na.rm = TRUE),
median_salary = median(salary, na.rm = TRUE),
gender = "Men"
)
women_by_age <- women_data %>%
group_by(age) %>%
summarise(
mean_salary = mean(salary, na.rm = TRUE),
median_salary = median(salary, na.rm = TRUE),
.groups = "drop"
) %>%
left_join(women_age_weights, by = "age") %>%
filter(!is.na(weight))
women_weighted <- women_by_age %>%
summarise(
mean_salary = sum(mean_salary * weight, na.rm = TRUE),
median_salary = sum(median_salary * weight, na.rm = TRUE),
gender = "Women",
.groups = "drop"
)
return(bind_rows(men_stats, women_weighted))
} else {
men_stats <- men_data %>%
group_by(group) %>%
summarise(
mean_salary = mean(salary, na.rm = TRUE),
median_salary = median(salary, na.rm = TRUE),
gender = "Men",
.groups = "drop"
)
women_by_age_group <- women_data %>%
group_by(age, group) %>%
summarise(
mean_salary = mean(salary, na.rm = TRUE),
median_salary = median(salary, na.rm = TRUE),
.groups = "drop"
) %>%
left_join(women_age_weights, by = "age") %>%
filter(!is.na(weight))
women_weighted <- women_by_age_group %>%
group_by(group) %>%
summarise(
mean_salary = sum(mean_salary * weight, na.rm = TRUE),
median_salary = sum(median_salary * weight, na.rm = TRUE),
gender = "Women",
.groups = "drop"
)
return(bind_rows(men_stats, women_weighted))
}
}
calculate_pay_gap <- function(data) {
women_salary <- data %>% filter(gender == "Women") %>% pull(mean_salary)
men_salary <- data %>% filter(gender == "Men") %>% pull(mean_salary)
if (length(women_salary) > 0 && length(men_salary) > 0) {
round((1 - women_salary/men_salary) * 100, 1)
} else {
NA
}
}
Comprehensive
Compensation Analysis
Base Salary Analysis
by Specialization
Mean Base Salary by
Specialization
salary_spec_data <- apply_compensation_weighting(df_2025, "base_salary", "specialization_group")
salary_mean_table <- salary_spec_data %>%
select(specialization = group, gender, mean_salary) %>%
pivot_wider(names_from = gender, values_from = mean_salary, names_prefix = "") %>%
mutate(Gap = round((1 - Women/Men) * 100, 1)) %>%
arrange(desc(Men))
# Add overall mean row
overall_mean_women <- salary_spec_data %>% filter(gender == "Women") %>% summarise(avg = mean(mean_salary)) %>% pull(avg)
overall_mean_men <- salary_spec_data %>% filter(gender == "Men") %>% summarise(avg = mean(mean_salary)) %>% pull(avg)
mean_row <- data.frame(
specialization = "OVERALL MEAN",
Women = overall_mean_women,
Men = overall_mean_men
) %>%
mutate(Gap = round((1 - Women/Men) * 100, 1))
final_mean_salary_table <- bind_rows(salary_mean_table, mean_row)
kable(final_mean_salary_table,
caption = "Mean Annual Base Salary by Specialization (Excluding Commissions and Bonuses)",
col.names = c("Specialization", "Women", "Men", "Gap %"),
format.args = list(big.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(nrow(final_mean_salary_table), bold = TRUE, background = "#e74c3c", color = "white")
Mean Annual Base Salary by Specialization (Excluding Commissions and
Bonuses)
|
Specialization
|
Women
|
Men
|
Gap %
|
|
Development
|
210,767.64
|
118,091.68
|
44.0
|
|
Finance
|
132,951.47
|
134,633.58
|
-1.3
|
|
Other
|
108,731.11
|
97,331.19
|
10.5
|
|
Asset Management
|
101,926.70
|
134,754.26
|
-32.2
|
|
Brokerage
|
70,653.76
|
92,044.34
|
-30.3
|
|
OVERALL MEAN
|
125,006.14
|
115,371.01
|
7.7
|
# Plot for mean salaries
ggplot(salary_spec_data, aes(x = reorder(group, mean_salary), y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(mean_salary)),
position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
coord_flip() +
labs(
title = "Mean Base Salary by Specialization and Gender",
subtitle = "Women's salaries weighted by 2020 age distribution",
x = "Specialization",
y = "Mean Annual Base Salary",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation

Median Base Salary
by Specialization
# Calculate median salary by specialization using proper weighting approach
median_salary_spec <- df_2025 %>%
filter(!is.na(base_salary) & base_salary > 0 & !is.na(specialization_group)) %>%
group_by(specialization_group, gender) %>%
summarise(median_salary = median(base_salary, na.rm = TRUE), .groups = "drop")
salary_median_table <- median_salary_spec %>%
select(specialization = specialization_group, gender, median_salary) %>%
pivot_wider(names_from = gender, values_from = median_salary, names_prefix = "") %>%
arrange(desc(coalesce(Men, Women, 0)))
# Ensure we have columns for Women and Men even if missing
if(!"Women" %in% names(salary_median_table)) salary_median_table$Women <- NA
if(!"Men" %in% names(salary_median_table)) salary_median_table$Men <- NA
# Calculate gap and reorder columns
salary_median_table <- salary_median_table %>%
mutate(Gap = round((1 - Women/Men) * 100, 1)) %>%
select(specialization, Women, Men, Gap)
# Add overall median row
overall_median_data <- df_2025 %>%
filter(!is.na(base_salary) & base_salary > 0) %>%
group_by(gender) %>%
summarise(median_salary = median(base_salary, na.rm = TRUE), .groups = "drop")
women_median <- ifelse(nrow(overall_median_data %>% filter(gender == "Women")) > 0,
overall_median_data %>% filter(gender == "Women") %>% pull(median_salary),
NA)
men_median <- ifelse(nrow(overall_median_data %>% filter(gender == "Men")) > 0,
overall_median_data %>% filter(gender == "Men") %>% pull(median_salary),
NA)
median_row <- data.frame(
specialization = "OVERALL MEDIAN",
Women = women_median,
Men = men_median,
Gap = round((1 - women_median/men_median) * 100, 1)
)
final_median_salary_table <- bind_rows(salary_median_table, median_row)
kable(final_median_salary_table,
caption = "Median Annual Base Salary by Specialization (Excluding Commissions and Bonuses)",
col.names = c("Specialization", "Women", "Men", "Gap %"),
format.args = list(big.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(nrow(final_median_salary_table), bold = TRUE, background = "#27ae60", color = "white")
Median Annual Base Salary by Specialization (Excluding Commissions and
Bonuses)
|
Specialization
|
Women
|
Men
|
Gap %
|
|
Development
|
112,250
|
125,000
|
10.2
|
|
Finance
|
120,000
|
116,925
|
-2.6
|
|
Other
|
100,000
|
97,850
|
-2.2
|
|
Asset Management
|
120,000
|
87,250
|
-37.5
|
|
Brokerage
|
75,000
|
60,000
|
-25.0
|
|
OVERALL MEDIAN
|
115,000
|
113,500
|
-1.3
|
# Plot for median salaries
ggplot(median_salary_spec, aes(x = reorder(specialization_group, median_salary), y = median_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(median_salary)),
position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
coord_flip() +
labs(
title = "Median Base Salary by Specialization and Gender",
subtitle = "50th percentile compensation levels",
x = "Specialization",
y = "Median Annual Base Salary",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation

Commission and Bonus
Analysis by Specialization
Mean Commission and
Bonus by Specialization
df_2025_combined <- df_2025 %>%
mutate(commission_bonus = (replace_na(commission, 0) + replace_na(bonus, 0)))
combined_spec_data <- apply_compensation_weighting(df_2025_combined, "commission_bonus", "specialization_group")
commission_mean_table <- combined_spec_data %>%
select(specialization = group, gender, commission_bonus = mean_salary) %>%
pivot_wider(names_from = gender, values_from = commission_bonus, names_prefix = "") %>%
mutate(Gap = round((1 - Women/Men) * 100, 1)) %>%
arrange(desc(Men))
# Add overall mean row
overall_mean_women_cb <- combined_spec_data %>% filter(gender == "Women") %>% summarise(avg = mean(mean_salary)) %>% pull(avg)
overall_mean_men_cb <- combined_spec_data %>% filter(gender == "Men") %>% summarise(avg = mean(mean_salary)) %>% pull(avg)
mean_cb_row <- data.frame(
specialization = "OVERALL MEAN",
Women = overall_mean_women_cb,
Men = overall_mean_men_cb
) %>%
mutate(Gap = round((1 - Women/Men) * 100, 1))
final_mean_commission_table <- bind_rows(commission_mean_table, mean_cb_row)
kable(final_mean_commission_table,
caption = "Mean Annual Commission and Bonus Earnings by Specialization",
col.names = c("Specialization", "Women", "Men", "Gap %"),
format.args = list(big.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(nrow(final_mean_commission_table), bold = TRUE, background = "#e74c3c", color = "white")
Mean Annual Commission and Bonus Earnings by Specialization
|
Specialization
|
Women
|
Men
|
Gap %
|
|
Brokerage
|
258,557.69
|
126,184.82
|
51.2
|
|
Development
|
156,694.89
|
37,157.93
|
76.3
|
|
Finance
|
45,353.13
|
57,482.75
|
-26.7
|
|
Asset Management
|
45,011.38
|
35,084.87
|
22.1
|
|
Other
|
44,905.67
|
28,842.93
|
35.8
|
|
OVERALL MEAN
|
110,104.55
|
56,950.66
|
48.3
|
# Plot for mean commission and bonus
ggplot(combined_spec_data, aes(x = reorder(group, mean_salary), y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(mean_salary)),
position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
coord_flip() +
labs(
title = "Mean Commission and Bonus Earnings by Specialization and Gender",
subtitle = "Women's earnings weighted by 2020 age distribution",
x = "Specialization",
y = "Mean Annual Commission + Bonus",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation

Median Commission
and Bonus by Specialization
# Calculate median commission and bonus by specialization
median_cb_spec <- df_2025_combined %>%
filter(!is.na(commission_bonus) & commission_bonus > 0 & !is.na(specialization_group)) %>%
group_by(specialization_group, gender) %>%
summarise(median_cb = median(commission_bonus, na.rm = TRUE), .groups = "drop")
commission_median_table <- median_cb_spec %>%
select(specialization = specialization_group, gender, median_cb) %>%
pivot_wider(names_from = gender, values_from = median_cb, names_prefix = "") %>%
arrange(desc(coalesce(Men, Women, 0)))
# Ensure we have columns for Women and Men even if missing
if(!"Women" %in% names(commission_median_table)) commission_median_table$Women <- NA
if(!"Men" %in% names(commission_median_table)) commission_median_table$Men <- NA
# Calculate gap and reorder columns
commission_median_table <- commission_median_table %>%
mutate(Gap = round((1 - Women/Men) * 100, 1)) %>%
select(specialization, Women, Men, Gap)
# Add overall median row
overall_median_cb_data <- df_2025_combined %>%
filter(!is.na(commission_bonus) & commission_bonus > 0) %>%
group_by(gender) %>%
summarise(median_cb = median(commission_bonus, na.rm = TRUE), .groups = "drop")
women_median_cb <- ifelse(nrow(overall_median_cb_data %>% filter(gender == "Women")) > 0,
overall_median_cb_data %>% filter(gender == "Women") %>% pull(median_cb),
NA)
men_median_cb <- ifelse(nrow(overall_median_cb_data %>% filter(gender == "Men")) > 0,
overall_median_cb_data %>% filter(gender == "Men") %>% pull(median_cb),
NA)
median_cb_row <- data.frame(
specialization = "OVERALL MEDIAN",
Women = women_median_cb,
Men = men_median_cb,
Gap = round((1 - women_median_cb/men_median_cb) * 100, 1)
)
final_median_commission_table <- bind_rows(commission_median_table, median_cb_row)
kable(final_median_commission_table,
caption = "Median Annual Commission and Bonus Earnings by Specialization",
col.names = c("Specialization", "Women", "Men", "Gap %"),
format.args = list(big.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(nrow(final_median_commission_table), bold = TRUE, background = "#27ae60", color = "white")
Median Annual Commission and Bonus Earnings by Specialization
|
Specialization
|
Women
|
Men
|
Gap %
|
|
Brokerage
|
50,000.0
|
110,000
|
54.5
|
|
Finance
|
17,250.5
|
22,000
|
21.6
|
|
Other
|
10,000.0
|
20,000
|
50.0
|
|
Development
|
10,000.0
|
18,000
|
44.4
|
|
Asset Management
|
20,000.0
|
16,000
|
-25.0
|
|
OVERALL MEDIAN
|
15,000.0
|
20,000
|
25.0
|
# Plot for median commission and bonus
ggplot(median_cb_spec, aes(x = reorder(specialization_group, median_cb), y = median_cb, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(median_cb)),
position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
coord_flip() +
labs(
title = "Median Commission and Bonus Earnings by Specialization and Gender",
subtitle = "50th percentile performance-based compensation",
x = "Specialization",
y = "Median Annual Commission + Bonus",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation

Total Compensation
Overview
total_comp_data <- apply_compensation_weighting(df_2025, "total_compensation")
pay_gap <- calculate_pay_gap(total_comp_data)
ggplot(total_comp_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = format_currency(mean_salary)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Total Annual Compensation by Gender",
subtitle = paste0("Women's compensation weighted by 2020 age distribution | Gender pay gap: ", pay_gap, "%"),
x = "Gender",
y = "Mean Total Compensation",
caption = "Source: 2025 Survey Data"
) +
theme_compensation +
guides(fill = "none")

Total Annual
Compensation Distribution
compensation_ranges <- df_2025 %>%
filter(!is.na(total_compensation) & total_compensation > 0 & !is.na(gender)) %>%
mutate(
salary_range = case_when(
total_compensation < 50000 ~ "Less than $50,000",
total_compensation >= 50000 & total_compensation <= 74999 ~ "$50,000 — $74,999",
total_compensation >= 75000 & total_compensation <= 99999 ~ "$75,000 — $99,999",
total_compensation >= 100000 & total_compensation <= 149999 ~ "$100,000 — $149,999",
total_compensation >= 150000 & total_compensation <= 199999 ~ "$150,000 — $199,999",
total_compensation >= 200000 & total_compensation <= 249999 ~ "$200,000 — $249,999",
total_compensation >= 250000 & total_compensation <= 349999 ~ "$250,000 — $349,999",
total_compensation >= 350000 & total_compensation <= 500000 ~ "$350,000 — $500,000",
total_compensation > 500000 ~ "More than $500,000",
TRUE ~ "Other"
),
salary_range = factor(salary_range, levels = c(
"Less than $50,000", "$50,000 — $74,999", "$75,000 — $99,999",
"$100,000 — $149,999", "$150,000 — $199,999", "$200,000 — $249,999",
"$250,000 — $349,999", "$350,000 — $500,000", "More than $500,000"
))
)
# Calculate percentages by gender and overall
comp_distribution <- compensation_ranges %>%
group_by(salary_range, gender) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(gender) %>%
mutate(
total_by_gender = sum(count),
pct_by_gender = round((count / total_by_gender) * 100, 1)
) %>%
ungroup() %>%
select(salary_range, gender, count, pct_by_gender)
# Calculate overall percentages
overall_distribution <- compensation_ranges %>%
group_by(salary_range) %>%
summarise(total_count = n(), .groups = "drop") %>%
mutate(
total_respondents = sum(total_count),
pct_total = round((total_count / total_respondents) * 100, 1)
)
# Reshape for table format
comp_table <- comp_distribution %>%
select(salary_range, gender, pct_by_gender) %>%
pivot_wider(names_from = gender, values_from = pct_by_gender, values_fill = 0) %>%
left_join(overall_distribution %>% select(salary_range, pct_total), by = "salary_range") %>%
arrange(salary_range)
# Ensure we have columns for Women and Men even if missing
if(!"Women" %in% names(comp_table)) comp_table$Women <- 0
if(!"Men" %in% names(comp_table)) comp_table$Men <- 0
# Reorder columns
comp_table <- comp_table %>%
select(salary_range, Women, Men, pct_total) %>%
arrange(match(salary_range, levels(compensation_ranges$salary_range)))
kable(comp_table,
caption = "Total Annual Compensation Distribution (All Respondents)",
col.names = c("Compensation Range", "Women (%)", "Men (%)", "All Respondents (%)"),
digits = 1) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
add_header_above(c(" " = 1, "Percentage in Each Range" = 3)) %>%
row_spec(0, bold = TRUE, color = "white", background = "#3498db")
Total Annual Compensation Distribution (All Respondents)
|
|
Percentage in Each Range
|
|
Compensation Range
|
Women (%)
|
Men (%)
|
All Respondents (%)
|
|
Less than $50,000
|
11.7
|
13.9
|
12.1
|
|
$50,000 — $74,999
|
5.6
|
4.8
|
5.5
|
|
$75,000 — $99,999
|
11.4
|
9.4
|
11.1
|
|
$100,000 — $149,999
|
24.7
|
17.8
|
23.8
|
|
$150,000 — $199,999
|
16.7
|
16.9
|
16.7
|
|
$200,000 — $249,999
|
9.9
|
13.6
|
10.4
|
|
$250,000 — $349,999
|
10.1
|
10.0
|
10.1
|
|
$350,000 — $500,000
|
5.2
|
6.6
|
5.4
|
|
More than $500,000
|
4.6
|
6.9
|
4.9
|
Compensation
Components Breakdown
comp_components <- list(
"Base Salary" = apply_compensation_weighting(df_2025, "base_salary"),
"Commission" = apply_compensation_weighting(df_2025, "commission"),
"Bonus" = apply_compensation_weighting(df_2025, "bonus"),
"Profit Sharing" = apply_compensation_weighting(df_2025, "profit_sharing"),
"Long-Term Incentive" = apply_compensation_weighting(df_2025, "long_term_incentive")
)
all_comp_data <- map_dfr(comp_components, identity, .id = "component") %>%
mutate(
component = factor(component, levels = c("Base Salary", "Commission", "Bonus",
"Profit Sharing", "Long-Term Incentive"))
)
total_comp_by_gender <- apply_compensation_weighting(df_2025, "total_compensation")
women_total <- total_comp_by_gender %>% filter(gender == "Women") %>% pull(mean_salary)
men_total <- total_comp_by_gender %>% filter(gender == "Men") %>% pull(mean_salary)
all_comp_data <- all_comp_data %>%
mutate(
total_comp = ifelse(gender == "Women", women_total, men_total),
percentage = round((mean_salary / total_comp) * 100, 1),
label = paste0(format_currency(mean_salary), "\n(", percentage, "%)")
)
ggplot(all_comp_data, aes(x = component, y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = label),
position = position_dodge(width = 0.7), vjust = -0.3, size = 3, fontface = "bold") +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Compensation Components Analysis by Gender",
subtitle = "Dollar amounts and percentages of total compensation shown",
x = "Compensation Component",
y = "Mean Annual Amount",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Compensation by
Career Level
position_comp_data <- apply_compensation_weighting(df_2025, "total_compensation", "m3A")
position_comp_filtered <- position_comp_data %>%
filter(!group %in% c("Unemployed", "Retired")) %>%
mutate(
group = case_when(
str_detect(group, "C-Suite") ~ "C-Suite",
str_detect(group, "SVP|Vice President|Managing Director|Partner") ~ "VP/SVP/MD/Partner",
str_detect(group, "Senior level") ~ "Senior Level",
str_detect(group, "Mid-level|Associate") ~ "Mid-Level/Associate",
str_detect(group, "Entry level") ~ "Entry Level",
str_detect(group, "Self-employed|Independent") ~ "Self-Employed",
TRUE ~ group
)
) %>%
filter(!is.na(group))
position_order <- c("Entry Level", "Mid-Level/Associate", "Senior Level",
"VP/SVP/MD/Partner", "C-Suite", "Self-Employed")
position_comp_filtered <- position_comp_filtered %>%
mutate(group = factor(group, levels = position_order))
ggplot(position_comp_filtered, aes(x = group, y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(mean_salary)),
position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5) +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Total Compensation by Position Level and Gender",
subtitle = "Clear hierarchy progression shown",
x = "Position Level",
y = "Mean Total Compensation",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))

Compensation by Age
Groups
age_comp_data <- apply_compensation_weighting(df_2025, "total_compensation", "m25")
age_comp_filtered <- age_comp_data %>%
filter(!is.na(group)) %>%
mutate(group = factor(group, levels = age_order))
ggplot(age_comp_filtered, aes(x = group, y = mean_salary, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = format_currency(mean_salary)),
position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5) +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Total Compensation by Age Group and Gender",
subtitle = "Career progression patterns visible across age groups",
x = "Age Group",
y = "Mean Total Compensation",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Diversity and Inclusion
Analysis
Compensation by Race
and Ethnicity
women_race_salary <- df_2025 %>%
filter(gender == "Women" & !is.na(ethnicity) & !is.na(base_salary) & base_salary > 0) %>%
group_by(age, ethnicity) %>%
summarise(mean_salary = mean(base_salary, na.rm = TRUE), .groups = "drop") %>%
left_join(women_age_weights, by = "age") %>%
filter(!is.na(weight)) %>%
group_by(ethnicity) %>%
summarise(
weighted_mean = sum(mean_salary * weight, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(gender = "Women")
men_race_salary <- df_2025 %>%
filter(gender == "Men" & !is.na(ethnicity) & !is.na(base_salary) & base_salary > 0) %>%
group_by(ethnicity) %>%
summarise(weighted_mean = mean(base_salary, na.rm = TRUE), .groups = "drop") %>%
mutate(gender = "Men")
all_race_salary <- bind_rows(women_race_salary, men_race_salary) %>%
pivot_wider(names_from = gender, values_from = weighted_mean, values_fill = NA) %>%
arrange(desc(Men))
overall_women <- women_race_salary %>% summarise(avg = mean(weighted_mean)) %>% pull(avg)
overall_men <- men_race_salary %>% summarise(avg = mean(weighted_mean)) %>% pull(avg)
all_row <- data.frame(
ethnicity = "ALL",
Women = overall_women,
Men = overall_men
)
final_race_salary_table <- bind_rows(all_race_salary, all_row)
kable(final_race_salary_table,
caption = "Mean Base Salaries by Race and Gender",
col.names = c("Race/Ethnicity", "Women", "Men"),
format.args = list(big.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(nrow(final_race_salary_table), bold = TRUE, background = "#e74c3c", color = "white")
Mean Base Salaries by Race and Gender
|
Race/Ethnicity
|
Women
|
Men
|
|
White
|
122,665.2
|
142,282.65
|
|
Other/Mixed
|
118,727.2
|
133,919.00
|
|
Black
|
101,784.4
|
117,911.00
|
|
Hispanic/Latinx
|
131,802.8
|
116,265.17
|
|
Asian
|
129,156.8
|
98,435.73
|
|
ALL
|
120,827.3
|
121,762.71
|
Commission and Bonus
by Race
women_race_cb <- df_2025 %>%
mutate(commission_bonus = (replace_na(commission, 0) + replace_na(bonus, 0))) %>%
filter(gender == "Women" & !is.na(ethnicity) & !is.na(commission_bonus) & commission_bonus > 0) %>%
group_by(age, ethnicity) %>%
summarise(mean_cb = mean(commission_bonus, na.rm = TRUE), .groups = "drop") %>%
left_join(women_age_weights, by = "age") %>%
filter(!is.na(weight)) %>%
group_by(ethnicity) %>%
summarise(
weighted_mean = sum(mean_cb * weight, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(gender = "Women")
men_race_cb <- df_2025 %>%
mutate(commission_bonus = (replace_na(commission, 0) + replace_na(bonus, 0))) %>%
filter(gender == "Men" & !is.na(ethnicity) & !is.na(commission_bonus) & commission_bonus > 0) %>%
group_by(ethnicity) %>%
summarise(weighted_mean = mean(commission_bonus, na.rm = TRUE), .groups = "drop") %>%
mutate(gender = "Men")
all_race_cb <- bind_rows(women_race_cb, men_race_cb) %>%
pivot_wider(names_from = gender, values_from = weighted_mean, values_fill = NA) %>%
arrange(desc(Men))
overall_women_cb <- women_race_cb %>% summarise(avg = mean(weighted_mean)) %>% pull(avg)
overall_men_cb <- men_race_cb %>% summarise(avg = mean(weighted_mean)) %>% pull(avg)
all_cb_row <- data.frame(
ethnicity = "ALL",
Women = overall_women_cb,
Men = overall_men_cb
)
final_race_cb_table <- bind_rows(all_race_cb, all_cb_row)
kable(final_race_cb_table,
caption = "Mean Commission and Bonuses by Race and Gender",
col.names = c("Race/Ethnicity", "Women", "Men"),
format.args = list(big.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(nrow(final_race_cb_table), bold = TRUE, background = "#e74c3c", color = "white")
Mean Commission and Bonuses by Race and Gender
|
Race/Ethnicity
|
Women
|
Men
|
|
Other/Mixed
|
31,986.82
|
154,000.00
|
|
Hispanic/Latinx
|
57,995.87
|
109,548.18
|
|
White
|
55,185.12
|
88,577.72
|
|
Black
|
43,298.58
|
40,173.29
|
|
Asian
|
25,470.11
|
21,555.56
|
|
ALL
|
42,787.30
|
82,770.95
|
Compensation by
Specialization and Race
spec_race_data <- df_2025 %>%
filter(!is.na(ethnicity) & !is.na(specialization_group) &
!is.na(total_compensation) & total_compensation > 0 &
specialization_group != "Other") %>%
group_by(specialization_group, ethnicity) %>%
summarise(
avg_salary = mean(total_compensation, na.rm = TRUE),
.groups = "drop"
)
spec_averages <- df_2025 %>%
filter(!is.na(specialization_group) & !is.na(total_compensation) &
total_compensation > 0 & specialization_group != "Other") %>%
group_by(specialization_group) %>%
summarise(overall_avg = mean(total_compensation, na.rm = TRUE), .groups = "drop")
spec_race_analysis <- spec_race_data %>%
left_join(spec_averages, by = "specialization_group") %>%
mutate(
difference = avg_salary - overall_avg,
difference_formatted = ifelse(difference >= 0,
paste0("+$", format(round(difference), big.mark = ",")),
paste0("-$", format(abs(round(difference)), big.mark = ",")))
) %>%
select(specialization_group, ethnicity, avg_salary, difference_formatted)
unique_specs <- unique(spec_race_analysis$specialization_group)
if(length(unique_specs) > 0) {
for(spec in unique_specs) {
spec_table <- spec_race_analysis %>%
filter(specialization_group == spec) %>%
select(ethnicity, avg_salary, difference_formatted) %>%
arrange(desc(avg_salary))
if(nrow(spec_table) > 0) {
cat("\n")
cat("###", toupper(spec), "SPECIALIZATION\n\n")
table_output <- kable(spec_table,
col.names = c("Race/Ethnicity", "Average", "Difference relative to average"),
format.args = list(big.mark = ","),
caption = paste(spec, "Compensation by Race/Ethnicity")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
cat(table_output)
cat("\n\n")
}
}
} else {
cat("Insufficient data for specialization by race analysis\n")
}
ASSET MANAGEMENT
SPECIALIZATION
Asset Management Compensation by Race/Ethnicity
|
Race/Ethnicity
|
Average
|
Difference relative to average
|
|
Asian
|
250,578.0
|
+$ 67,158
|
|
White
|
169,544.2
|
-$ 13,876
|
|
Hispanic/Latinx
|
160,521.4
|
-$ 22,899
|
|
Other/Mixed
|
143,257.8
|
-$ 40,162
|
|
Black
|
133,990.1
|
-$ 49,430
|
BROKERAGE
SPECIALIZATION
Brokerage Compensation by Race/Ethnicity
|
Race/Ethnicity
|
Average
|
Difference relative to average
|
|
White
|
224,421.4
|
+$ 1,641
|
|
Black
|
153,500.0
|
-$ 69,280
|
|
Hispanic/Latinx
|
152,416.7
|
-$ 70,364
|
|
Asian
|
132,000.0
|
-$ 90,780
|
|
Other/Mixed
|
110,000.0
|
-$112,780
|
DEVELOPMENT
SPECIALIZATION
Development Compensation by Race/Ethnicity
|
Race/Ethnicity
|
Average
|
Difference relative to average
|
|
Hispanic/Latinx
|
174,942.0
|
-$ 32,985
|
|
White
|
173,475.5
|
-$ 34,451
|
|
Other/Mixed
|
164,706.0
|
-$ 43,221
|
|
Asian
|
156,321.7
|
-$ 51,605
|
|
Black
|
131,727.4
|
-$ 76,199
|
FINANCE
SPECIALIZATION
Finance Compensation by Race/Ethnicity
|
Race/Ethnicity
|
Average
|
Difference relative to average
|
|
Hispanic/Latinx
|
225,270.9
|
+$ 8,165
|
|
White
|
221,546.2
|
+$ 4,441
|
|
Asian
|
179,004.6
|
-$ 38,101
|
|
Other/Mixed
|
170,538.3
|
-$ 46,567
|
|
Black
|
150,328.2
|
-$ 66,777
|
Career Dynamics and
Preferences
Compensation Changes
and Projections
comp_change_data <- apply_women_weighting(df_2025, "m22A")
comp_change_filtered <- comp_change_data %>%
filter(!is.na(response)) %>%
mutate(
response = case_when(
response == "Increase:" ~ "Increased",
response == "Decrease:" ~ "Decreased",
response == "Stayed the same" ~ "Stayed the Same",
TRUE ~ response
)
)
ggplot(comp_change_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Compensation Changes from 2023-2024 by Gender",
subtitle = "Women's responses weighted by 2020 age distribution",
x = "Compensation Change Direction",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation

proj_change_data <- apply_women_weighting(df_2025, "m22C")
proj_change_filtered <- proj_change_data %>%
filter(!is.na(response)) %>%
mutate(
response = case_when(
response == "Increase:" ~ "Expect Increase",
response == "Decrease:" ~ "Expect Decrease",
response == "Stay the same" ~ "Expect No Change",
TRUE ~ response
)
)
ggplot(proj_change_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Projected 2025 Compensation Changes by Gender",
subtitle = "Women's responses weighted by 2020 age distribution",
x = "Expected Compensation Change",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation

Commission-Based
Career Preferences
commission_data <- apply_women_weighting(df_2025, "m23B")
commission_filtered <- commission_data %>%
filter(!is.na(response))
ggplot(commission_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), vjust = -0.5, size = 4, fontface = "bold") +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Willingness to Accept Commission-Based Positions by Gender",
subtitle = "Women's responses weighted by 2020 age distribution",
x = "Commission Willingness",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation

comp_structure_data <- apply_women_weighting(df_2025, "m24")
comp_structure_filtered <- comp_structure_data %>%
filter(!is.na(response)) %>%
mutate(
response = case_when(
str_detect(response, "not part of the compensation structure") ~ "Commissions Not\nPart of Career",
str_detect(response, "actively pursued") ~ "Actively Pursued\nCommission Career",
str_detect(response, "does not play a significant role") ~ "Commission Role\nNot Significant",
str_detect(response, "altered.*to avoid") ~ "Altered Career to\nAvoid Commission",
response == "None of these" ~ "None of These",
TRUE ~ str_wrap(response, 20)
)
)
ggplot(comp_structure_filtered, aes(x = reorder(response, weighted_percent), y = weighted_percent, fill = gender)) +
geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
geom_text(aes(label = paste0(round(weighted_percent, 1), "%")),
position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
coord_flip() +
labs(
title = "Role of Compensation Structure in Career Path by Gender",
subtitle = "Women's responses weighted by 2020 age distribution",
x = "Career Path Relationship to Commission",
y = "Percentage of Respondents",
fill = "Gender",
caption = "Source: 2025 Survey Data"
) +
theme_compensation +
theme(axis.text.y = element_text(size = 9))

Work-Life Balance and
Career Satisfaction
Priority and
Satisfaction Metrics
df_2025_clean_scales <- df_2025 %>%
mutate(
family_time_importance = clean_numeric_column(`m14A[SQ001]`),
career_satisfaction = clean_numeric_column(`m14B[SQ007]`),
earnings_importance = clean_numeric_column(`m14A[SQ009]`)
)
family_time_data <- apply_compensation_weighting(df_2025_clean_scales, "family_time_importance")
ggplot(family_time_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = round(mean_salary, 1)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Importance of Family Time by Gender",
subtitle = "Scale: 1 (Not Important) to 10 (Very Important)",
x = "Gender",
y = "Mean Importance Score",
caption = "Source: 2025 Survey Data"
) +
theme_compensation +
guides(fill = "none")

career_sat_data <- apply_compensation_weighting(df_2025_clean_scales, "career_satisfaction")
ggplot(career_sat_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = round(mean_salary, 1)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Career Achievement Satisfaction by Gender",
subtitle = "Scale: 1 (Not Satisfied) to 10 (Very Satisfied)",
x = "Gender",
y = "Mean Satisfaction Score",
caption = "Source: 2025 Survey Data"
) +
theme_compensation +
guides(fill = "none")

earnings_imp_data <- apply_compensation_weighting(df_2025_clean_scales, "earnings_importance")
ggplot(earnings_imp_data, aes(x = gender, y = mean_salary, fill = gender)) +
geom_col(width = 0.6, alpha = 0.8) +
geom_text(aes(label = round(mean_salary, 1)),
vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = gender_colors) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
labs(
title = "Importance of Maximizing Earnings Potential by Gender",
subtitle = "Scale: 1 (Not Important) to 10 (Very Important)",
x = "Gender",
y = "Mean Importance Score",
caption = "Source: 2025 Survey Data"
) +
theme_compensation +
guides(fill = "none")

Executive Summary
Key Gender Pay Gap
Findings
base_salary_gap <- calculate_pay_gap(apply_compensation_weighting(df_2025, "base_salary"))
commission_gap <- calculate_pay_gap(apply_compensation_weighting(df_2025, "commission"))
bonus_gap <- calculate_pay_gap(apply_compensation_weighting(df_2025, "bonus"))
total_comp_gap <- calculate_pay_gap(apply_compensation_weighting(df_2025, "total_compensation"))
summary_gaps <- data.frame(
Component = c("Base Salary", "Commission", "Bonus", "Total Compensation"),
`Gender_Gap_Percent` = c(base_salary_gap, commission_gap, bonus_gap, total_comp_gap)
)
kable(summary_gaps,
caption = "Gender Pay Gaps Summary - Key Compensation Components",
col.names = c("Compensation Component", "Gender Gap (%)"),
digits = 1) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(0, bold = TRUE, color = "white", background = "#e74c3c")
Gender Pay Gaps Summary - Key Compensation Components
|
Compensation Component
|
Gender Gap (%)
|
|
Base Salary
|
19.9
|
|
Commission
|
29.0
|
|
Bonus
|
61.2
|
|
Total Compensation
|
36.0
|
Compensation Source
Distribution
compensation_sources_2025 <- df_2025 %>%
filter(!is.na(total_compensation) & total_compensation > 0 &
!is.na(base_salary) & !is.na(bonus) & !is.na(commission) &
!is.na(profit_sharing) & !is.na(long_term_incentive)) %>%
mutate(
component_sum = base_salary + bonus + commission + profit_sharing + long_term_incentive,
base_pct = (base_salary / component_sum) * 100,
bonus_pct = (bonus / component_sum) * 100,
commission_pct = (commission / component_sum) * 100,
profit_pct = (profit_sharing / component_sum) * 100,
lti_pct = (long_term_incentive / component_sum) * 100
) %>%
filter(component_sum > 0) %>%
group_by(gender) %>%
summarise(
Annual_Salary = round(mean(base_pct, na.rm = TRUE)),
Short_Term_Bonus = round(mean(bonus_pct, na.rm = TRUE)),
Commission = round(mean(commission_pct, na.rm = TRUE)),
Profit_Sharing = round(mean(profit_pct, na.rm = TRUE)),
Long_Term_Incentive = round(mean(lti_pct, na.rm = TRUE)),
.groups = "drop"
)
if(nrow(compensation_sources_2025) > 0) {
sources_table <- compensation_sources_2025 %>%
pivot_longer(cols = -gender, names_to = "Component", values_to = "Percentage") %>%
pivot_wider(names_from = gender, values_from = Percentage) %>%
mutate(
Component = case_when(
Component == "Annual_Salary" ~ "Annual Salary",
Component == "Short_Term_Bonus" ~ "Short-Term Incentive Bonus",
Component == "Commission" ~ "Commission",
Component == "Profit_Sharing" ~ "Profit Sharing",
Component == "Long_Term_Incentive" ~ "Long-Term Incentive",
TRUE ~ Component
)
)
if(!"Women" %in% names(sources_table)) sources_table$Women <- NA
if(!"Men" %in% names(sources_table)) sources_table$Men <- NA
sources_table <- sources_table %>%
select(Component, Women, Men)
kable(sources_table,
caption = "Sources of Compensation by Component (Percentage of Total Compensation)",
col.names = c("Compensation Component", "Women (%)", "Men (%)"),
digits = 0) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
}
Sources of Compensation by Component (Percentage of Total Compensation)
|
Compensation Component
|
Women (%)
|
Men (%)
|
|
Annual Salary
|
76
|
66
|
|
Short-Term Incentive Bonus
|
9
|
8
|
|
Commission
|
9
|
19
|
|
Profit Sharing
|
5
|
5
|
|
Long-Term Incentive
|
2
|
2
|